############################## Config #########################################
container: "docker://satijalab/azimuth-references:latest" 

############################## All ############################################
rule all:
    input:
        "reference/ref.Rds",
        "reference/idx.annoy",
        "reference/pbmc_10k_v3_filtered_feature_bc_matrix.h5"

############################## Reference ######################################
rule download:
    params:
        data_url = "https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat",
        mapping_cells_url = "https://www.dropbox.com/s/6gumkybwtqq0k0w/pbmc_multimodal_reference_mapping_cells.txt",
        plotting_cells_url = "https://www.dropbox.com/s/36m0addaeohgcf6/pbmc_multimodal_reference_plotting_cells.txt"
    output:
        "logs/download_data.log",
        "data/pbmc_multimodal.h5seurat",
        "data/pbmc_multimodal_reference_mapping_cells.txt",
        "data/pbmc_multimodal_reference_plotting_cells.txt"
    shell:
        """
        wget {params.data_url} -P data
        wget {params.mapping_cells_url} -P data
        wget {params.plotting_cells_url} -P data
        echo "PBMC reference data downloaded on: $(date)" > logs/download_data.log
        """

rule export:
    input:
        script = "scripts/export.R",
        data = "data/pbmc_multimodal.h5seurat",
        mapping_cells = "data/pbmc_multimodal_reference_mapping_cells.txt",
        plotting_cells = "data/pbmc_multimodal_reference_plotting_cells.txt"
    output:
        "reference/ref.Rds",
        "reference/idx.annoy"
    shell:
        """
        Rscript {input.script} {input.data} {input.mapping_cells} {input.plotting_cells} > logs/export.Rout 2>&1
        """

############################## Demo  ##########################################
rule download_demo:
    params:
        url = "https://www.dropbox.com/s/cmbvq2og93lnl9z/pbmc_10k_v3_filtered_feature_bc_matrix.h5"
    output:
        "logs/download_demo_data.log",
        "reference/pbmc_10k_v3_filtered_feature_bc_matrix.h5"
    shell:
        """
        wget {params.url} -P reference
        echo "PBMC demo data downloaded on: $(date)" > logs/download_demo_data.log
        """

rule download_pbmc_expression:
    params:
        url = "'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE164378&format=file'"
    output:
        "data/GSM5008737_RNA_3P-matrix.mtx.gz",
        "data/GSM5008737_RNA_3P-barcodes.tsv.gz",
        "data/GSM5008737_RNA_3P-features.tsv.gz"
    shell:
        """
        wget -O data/GSE164378_RAW.tar {params.url}
        tar -xvf data/GSE164378_RAW.tar -C data/
        rm data/*TCR*
        rm data/*BCR*
        rm data/*5P*
        rm data/*ADT_3P*
        rm data/*HTO_3P*
        """

rule export_zarr:
    input: 
        ref = "reference/ref.Rds",
        fullref = "data/pbmc_multimodal.h5seurat",
        rna_mat = "data/GSM5008737_RNA_3P-matrix.mtx.gz",
        rna_cells = "data/GSM5008737_RNA_3P-barcodes.tsv.gz",
        rna_features = "data/GSM5008737_RNA_3P-features.tsv.gz",
        script1 = "scripts/convert_to_h5ad.R",
        script2 = "scripts/convert_to_zarr.py"
    output:
        h5Seurat = "vitessce/vitessce_ref.h5Seurat",
        h5ad = "vitessce/vitessce_ref.h5ad",
        zarr = directory("vitessce/vitessce_ref.zarr")
    container:
        "docker://satijalab/azimuth-references:vitessce"
    shell:
        """
        mkdir -p vitessce
        Rscript {input.script1} {input.ref} {input.fullref} {input.rna_mat} {input.rna_cells} {input.rna_features} {output.h5Seurat} > logs/export_zarr_anndata.Rout 2>&1
        python3 {input.script2} {output.h5ad} {output.zarr}
        """

